package com.shujia.spark.sql

import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}

object Demo4Submit {
  def main(args: Array[String]): Unit = {
    //1、创建spark sql环境
    val spark: SparkSession = SparkSession
      .builder()
      //.master("local")
      .config("spark.sql.shuffle.partitions", 1)
      .appName("api")
      .getOrCreate()

    import spark.implicits._
    import org.apache.spark.sql.functions._

    //读取数据
    val studentDF: DataFrame = spark
      .read
      .format("csv") //数据格式
      .option("sep", ",") //字段分隔符
      .schema("id STRING,name STRING,age INT, sex STRING,clazz STRING") //字段名和类型
      .load("/data/student")

    //统计班级的人数
    val clazzNumDF: DataFrame = studentDF
      .groupBy("clazz")
      .agg(count($"clazz" as "num"))

    //保存结果
    clazzNumDF
      .write
      .format("csv")
      .option("sep", ",")
      .mode(SaveMode.Overwrite)
      .save("/data/clazz_num1")
  }
}
